set.seed(15102020)
bootstraps <- 1000
data_path <- "./Data/"
Proj_name <- "BRC_growth_rate"
Browns <- RColorBrewer::brewer.pal(n = 9, "YlOrBr")[9:6]
Greens <- RColorBrewer::brewer.pal(n = 9, "YlGn")[9:6]
Blues <- RColorBrewer::brewer.pal(n = 9, "YlGnBu")[9:6]
Gradient.colours <- c(Browns[1], Greens[1], Browns[2], Greens[2], Browns[3], Greens[3], Browns[4], Greens[4], Blues)This script reproduces all sequence analysis steps and plots included in the paper plus some additional exploratory analyses.
OTUmat <- t(read.csv(paste0(data_path, "Shivta_site_otuTab2.txt"), header = TRUE, row.names = 1))
sort.order <- as.numeric( gsub( "OTU([0-9]+)", "\\1", colnames( OTUmat ) ) )
OTUmat <- OTUmat[, order( sort.order )]
Metadata <- read.csv(paste0(data_path, "Shivta_metadata.csv"), row.names = 1, header = TRUE)
read_csv(paste0(data_path, "Shivta_metadata.csv"),
trim_ws = TRUE) %>%
mutate_at(
c(
"Rock.type",
"Location"
),
~(factor(.))
) %>%
column_to_rownames("Sample.code") ->
Metadata
row.names(OTUmat) <- gsub("(.*)Nimrod[0-9]+|Osnat[0-9]+", "\\1", row.names( OTUmat))
Metadata <- Metadata[order(row.names(Metadata)), ]
OTUmat <- OTUmat[order(row.names(OTUmat)), ]
# calculate sample size
Metadata$Library.size = rowSums(OTUmat)
Metadata$Location.rock <- with(Metadata, Location:Rock.type)
# Load taxonomy data
tax.file <- "Shivta_site_silva.nrv119.taxonomy"
Taxonomy <- read.table(paste0(data_path, tax.file), stringsAsFactors = FALSE) # read taxonomy file
# count how many ';' in each cell and add up to 6
for (i in 1:nrow(Taxonomy)){
semicolons <- length(gregexpr(";", Taxonomy$V2[i] )[[1]])
if (semicolons < 6){
x <- paste0( rep("Unclassified;", 6 - semicolons ), collapse = "")
Taxonomy$V2[i] <- paste0( Taxonomy$V2[i], x, sep = "")
}
}
# split taxonomy to columns
do.call( "rbind", strsplit( Taxonomy$V1, ";", fixed = TRUE)) %>%
gsub( "size=([0-9]+)", "\\1", .) %>%
data.frame( ., do.call( "rbind", strsplit( Taxonomy$V2, ";", fixed = TRUE)), stringsAsFactors = F) %>%
apply(., 2, function(x) gsub( "\\(.*\\)", "", x)) %>%
replace(., . == "unclassified", "Unclassified") ->
Taxonomy
colnames( Taxonomy ) <- c( "OTU", "Frequency", "Domain", "Phylum", "Class", "Order", "Family", "Genus" )
# rownames(Taxonomy) <- colnames(Rock_weathering_OTUmat)
rownames(Taxonomy) <- Taxonomy[, 1]
Tree_IQ <- read_tree(paste0(data_path, "Shivta_site_otuReps.filtered.align.treefile"))
# generate phyloseq object
Ps_obj <- phyloseq(otu_table(OTUmat, taxa_are_rows = FALSE),
tax_table(Taxonomy[, -c(1, 2)]),
sample_data(Metadata),
phy_tree(Tree_IQ)
)
# Reorder factors for plotting
sample_data(Ps_obj)$Location %<>% fct_relevel("Slope", "City")Remove un- and mis-classified sequences, chloroplasts and mitochondria
Ps_obj_df <-
as.data.frame(sample_data(Ps_obj_filt)) # Put sample_data into a ggplot-friendly data.frame
Ps_obj_df <- Ps_obj_df[order(Ps_obj_df$Library.size), ]
Ps_obj_df$Index <- seq(nrow(Ps_obj_df))
ggplot(data = Ps_obj_df,
aes(x = Index, y = Library.size, color = Location.rock)) +
geom_point(size = 4) +
scale_colour_manual(values = ggpomological:::pomological_palette[c(2, 1, 9, 3)], name = "Location.rock")## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23972 44989 54329 53254 61352 77463
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 70 191 1814 769 76180
prevdf <- apply(X = otu_table(Ps_obj_filt),
MARGIN = ifelse(taxa_are_rows(Ps_obj_filt), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf <- data.frame(Prevalence = prevdf,
TotalAbundance = taxa_sums(Ps_obj_filt),
tax_table(Ps_obj_filt))
prevdf %>%
group_by(Phylum) %>%
summarise(`Mean prevalence` = mean(Prevalence),
`Sum prevalence` = sum(Prevalence)) ->
Prevalence_phylum_summary
Prevalence_phylum_summary %>%
kable(., digits = c(0, 1, 0)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)| Phylum | Mean prevalence | Sum prevalence |
|---|---|---|
| Acidobacteria | 11.7 | 152 |
| Actinobacteria | 16.3 | 3414 |
| Aquificae | 4.0 | 12 |
| Armatimonadetes | 7.6 | 76 |
| Bacteroidetes | 11.8 | 1027 |
| Candidate_division_KB1 | 8.0 | 8 |
| Candidate_division_OD1 | 4.0 | 4 |
| Candidate_division_OP11 | 4.5 | 9 |
| Candidate_division_TM7 | 7.9 | 158 |
| Chloroflexi | 11.0 | 1124 |
| Cyanobacteria | 14.9 | 506 |
| Deinococcus-Thermus | 15.8 | 174 |
| Euryarchaeota | 3.9 | 90 |
| Firmicutes | 13.4 | 214 |
| Gemmatimonadetes | 11.2 | 617 |
| Nitrospirae | 6.0 | 12 |
| Planctomycetes | 10.8 | 140 |
| Proteobacteria | 14.4 | 1817 |
| Tenericutes | 3.0 | 3 |
| Verrucomicrobia | 23.0 | 23 |
| WCHB1-60 | 7.0 | 28 |
prevdf %>%
group_by(Order) %>%
summarise(`Mean prevalence` = mean(Prevalence),
`Sum prevalence` = sum(Prevalence)) ->
Prevalence_Order_summary
Prevalence_Order_summary %>%
kable(., digits = c(0, 1, 0)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)| Order | Mean prevalence | Sum prevalence |
|---|---|---|
| AKIW781 | 9.4 | 198 |
| AKYG1722 | 10.7 | 75 |
| AT425-EubC11_terrestrial_group | 10.2 | 378 |
| Acidimicrobiales | 14.7 | 368 |
| Alteromonadales | 12.0 | 12 |
| Aquificales | 4.0 | 12 |
| Ardenticatenales | 8.0 | 16 |
| B103G10 | 7.0 | 7 |
| BD2-11_terrestrial_group | 5.0 | 5 |
| Bacillales | 13.7 | 164 |
| Bacteroidales | 17.0 | 17 |
| Bdellovibrionales | 8.0 | 40 |
| Burkholderiales | 18.5 | 204 |
| C0119 | 5.0 | 5 |
| Caldilineales | 18.0 | 18 |
| Campylobacterales | 6.0 | 6 |
| Caulobacterales | 23.7 | 71 |
| Chromatiales | 20.0 | 20 |
| Chthoniobacterales | 23.0 | 23 |
| Clostridiales | 6.5 | 13 |
| Corynebacteriales | 8.0 | 40 |
| Cytophagales | 11.8 | 695 |
| Deinococcales | 15.1 | 151 |
| Desulfovibrionales | 5.0 | 5 |
| E6aD10 | 11.0 | 11 |
| EMP-G18 | 3.0 | 3 |
| Enterobacteriales | 22.0 | 44 |
| Euzebyales | 13.5 | 202 |
| Flavobacteriales | 12.7 | 38 |
| Frankiales | 20.9 | 356 |
| Gaiellales | 10.4 | 94 |
| Gemmatimonadales | 14.3 | 214 |
| Halobacteriales | 3.9 | 90 |
| JG30-KF-CM45 | 13.0 | 468 |
| Kineosporiales | 20.2 | 121 |
| Lactobacillales | 18.5 | 37 |
| Micrococcales | 18.0 | 198 |
| Micromonosporales | 19.5 | 39 |
| Myxococcales | 10.2 | 51 |
| Nitriliruptorales | 9.7 | 58 |
| Nitrosomonadales | 11.5 | 23 |
| Nitrospirales | 6.0 | 12 |
| Oceanospirillales | 16.0 | 16 |
| Orbales | 7.0 | 7 |
| Order_II | 4.8 | 24 |
| Order_III | 8.7 | 26 |
| PAUC43f_marine_benthic_group | 2.0 | 2 |
| Planctomycetales | 9.2 | 74 |
| Propionibacteriales | 14.5 | 318 |
| Pseudomonadales | 14.7 | 103 |
| Pseudonocardiales | 18.0 | 306 |
| Rhizobiales | 16.1 | 386 |
| Rhodobacterales | 14.8 | 163 |
| Rhodospirillales | 11.8 | 271 |
| Rickettsiales | 11.2 | 146 |
| Rubrobacterales | 22.3 | 491 |
| S0134_terrestrial_group | 18.0 | 18 |
| SBYG-4553 | 10.0 | 10 |
| Solirubrobacterales | 17.2 | 638 |
| Sphaerobacterales | 3.3 | 10 |
| Sphingobacteriales | 14.2 | 227 |
| Sphingomonadales | 19.6 | 196 |
| Streptomycetales | 24.5 | 49 |
| Streptosporangiales | 3.0 | 3 |
| Subgroup_3 | 15.0 | 15 |
| Subgroup_4 | 10.6 | 85 |
| Subgroup_6 | 13.0 | 52 |
| SubsectionI | 13.0 | 13 |
| SubsectionII | 15.8 | 315 |
| SubsectionIII | 14.8 | 118 |
| SubsectionIV | 13.5 | 54 |
| TRA3-20 | 14.0 | 14 |
| Thermales | 23.0 | 23 |
| Thermophilales | 15.0 | 30 |
| Unclassified | 9.2 | 642 |
| Unknown_Order | 9.8 | 98 |
| Vampirovibrionales | 6.0 | 6 |
| WD2101_soil_group | 13.0 | 39 |
| Xanthomonadales | 18.0 | 18 |
Based on that we will remove all phyla with a prevalence of under 8
Prevalence_phylum_summary %>%
filter(`Sum prevalence` < 8) %>%
dplyr::select(Phylum) %>%
map(as.character) %>%
unlist() ->
filterPhyla
Ps_obj_filt %<>% subset_taxa(!Phylum %in% filterPhyla)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 767 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 767 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 732 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 732 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 732 tips and 730 internal nodes ]
## taxa are columns
# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_filt, "Phylum"))
ggplot(prevdf_phylum_filt,
aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Phylum)) +
# Include a guess for parameter
geom_hline(yintercept = 0.05,
alpha = 0.5,
linetype = 2) + geom_point(size = 2, alpha = 0.7) +
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap( ~ Phylum) + theme(legend.position = "none")# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_filt, "Order"))
# grab the top 30 most abundant orders
prevdf_order_filt %>%
group_by(Order) %>%
summarise(Combined.abundance = sum(TotalAbundance)) %>%
arrange(desc(Combined.abundance)) %>%
.[1:30, "Order"] ->
Orders2plot
prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)
ggplot(prevdf_order_filt2,
aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Order)) +
# Include a guess for parameter
geom_hline(yintercept = 0.05,
alpha = 0.5,
linetype = 2) + geom_point(size = 2, alpha = 0.7) +
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap( ~ Order) + theme(legend.position = "none")We will remove all sequences which appear in less than 10% of the samples
# Define prevalence threshold as 10% of total samples
prevalenceThreshold <- 0.1 * nsamples(Ps_obj_filt)
prevalenceThreshold## [1] 2.5
# Execute prevalence filter, using `prune_taxa()` function
keepTaxa <-
row.names(prevdf_phylum_filt)[(prevdf_phylum_filt$Prevalence >= prevalenceThreshold)]
Ps_obj_filt %<>% prune_taxa(keepTaxa, .)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 767 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 767 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 711 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 711 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 711 tips and 709 internal nodes ]
## taxa are columns
This removed 56 or 7% of the sequences.
First let’s look at the count data distribution after filtering:
sample_data(Ps_obj_filt) %>%
as_tibble() %>%
dplyr::select(Sample.name, Library.size) %>%
as(., "data.frame") %>%
kable(.) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)| Sample.name | Library.size |
|---|---|
| City Chalk 2 | 49453 |
| City Chalk 3 | 54157 |
| City Chalk 4 | 44943 |
| City Chalk 5 | 58835 |
| City Chalk 6 | 77463 |
| City Limestone 1 | 36122 |
| City Limestone 2 | 52816 |
| City Limestone 3 | 56370 |
| City Limestone 4 | 53092 |
| City Limestone 5 | 61350 |
| City Limestone 6 | 54508 |
| Slope Limestone 1 | 75580 |
| Slope Chalk 1 | 51652 |
| Slope Chalk 2 | 45201 |
| Slope Chalk 3 | 33632 |
| Slope Chalk 4 | 70890 |
| Slope Chalk 5 | 71460 |
| Slope Chalk 6 | 58961 |
| Slope Chalk 7 | 36759 |
| City Chalk 1 | 23968 |
| Slope Limestone 2 | 62460 |
| Slope Limestone 3 | 36635 |
| Slope Limestone 4 | 57291 |
| Slope Limestone 5 | 64511 |
| Slope Limestone 6 | 41565 |
The figure and table indicate only a small deviation in the number of reads per samples.
(mod1 <- adonis(
otu_table(Ps_obj_filt) ~ Library.size,
data = as(sample_data(Ps_obj_filt), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = otu_table(Ps_obj_filt) ~ Library.size, data = as(sample_data(Ps_obj_filt), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 0.4358 0.43579 1.3712 0.05626 0.2005
## Residuals 23 7.3095 0.31781 0.94374
## Total 24 7.7453 1.00000
notAllZero <- (rowSums(t(otu_table(Ps_obj_filt))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt))[notAllZero, ] + 1)))The difference in library sizes is low and its effect on the community composition is minimal. We’ll use the GMPR method for library size normalisation (Chen and Chen 2017)
Ps_obj_filt_GMPR <- Ps_obj_filt
Ps_obj_filt %>%
otu_table(.) %>%
t() %>%
as(., "matrix") %>%
GMPR() ->
GMPR_factors## Begin GMPR size factor calculation ...
## Completed!
## Please watch for the samples with limited sharing with other samples based on NSS! They may be outliers!
Ps_obj_filt %>%
otu_table(.) %>%
t() %*% diag(1 / GMPR_factors$gmpr) %>%
t() %>%
as.data.frame(., row.names = sample_names(Ps_obj_filt)) %>%
otu_table(., taxa_are_rows = FALSE) ->
otu_table(Ps_obj_filt_GMPR)
sample_data(Ps_obj_filt_GMPR)$Library.size <- sample_sums(Ps_obj_filt)
adonis(
otu_table(Ps_obj_filt_GMPR) ~ Library.size,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
)##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Library.size, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 0.4358 0.43579 1.3712 0.05626 0.2068
## Residuals 23 7.3095 0.31781 0.94374
## Total 24 7.7453 1.00000
Did it improve anything?
notAllZero <- (rowSums(t(otu_table(Ps_obj_filt_GMPR))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt_GMPR))[notAllZero, ] + 1)))We do that by simulating 1000 rarefaction events and calculating the metrics each time. Then, the result is averaged.
rarefaction.mat <- matrix(0, nrow = nsamples(Ps_obj_filt), ncol = bootstraps)
rownames(rarefaction.mat) <- sample_names(Ps_obj_filt)
rich.ests <- list(S.obs = rarefaction.mat, S.chao1 = rarefaction.mat, se.chao1 = rarefaction.mat,
S.ACE = rarefaction.mat, se.ACE = rarefaction.mat)
for (i in seq(bootstraps)) {
sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
for (j in seq(length(rich.ests))) {
rich.ests[[j]][, i] <- t(estimateR(sub.OTUmat))[, j]
}
}
Richness <- data.frame(row.names = row.names(rich.ests[[1]]))
for (i in c(1, seq(2, length(rich.ests), 2))) {
S <- apply(rich.ests[[i]], 1, mean)
if (i == 1) {
se <- apply(rich.ests[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
} else se <- apply(rich.ests[[i + 1]], 1, mean)
Richness <- cbind(Richness, S, se)
}
colnames(Richness) <- c("S.obs", "S.obs.se", "S.chao1", "S.chao1.se", "S.ACE", "S.ACE.se")
saveRDS(Richness, file = paste0("./", Proj_name))
write.csv(Richness, file = paste0("./", Proj_name))
ses <- grep("\\.se", colnames(Richness))
Richness[, ses] %>%
gather(key = "est.se") -> se.dat
Richness[, -unique(ses)] %>%
gather(key = "est") -> mean.dat
n <- length(unique(mean.dat$est))
# diversity indices
diversity.inds <- list(Shannon = rarefaction.mat, inv.simpson = rarefaction.mat, BP = rarefaction.mat)
for (i in seq(bootstraps)) {
sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
diversity.inds$Shannon[, i] <- diversityresult(sub.OTUmat, index = 'Shannon', method = 'each site', digits = 3)[, 1]
diversity.inds$inv.simpson[, i] <- diversityresult(sub.OTUmat, index = 'inverseSimpson', method = 'each site', digits = 3)[, 1]
diversity.inds$BP[, i] <- diversityresult(sub.OTUmat, index = 'Berger', method = 'each site', digits = 3)[, 1]
}
Diversity <- data.frame(row.names = row.names(diversity.inds[[1]]))
for (i in seq(length(diversity.inds))) {
S <- apply(diversity.inds[[i]], 1, mean)
se <- apply(diversity.inds[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
Diversity <- cbind(Diversity, S, se)
}
colnames(Diversity) <- c("Shannon", "Shannon.se", "Inv.simpson", "Inv.simpson.se", "BP", "BP.se")
ses <- grep("\\.se", colnames(Diversity))
Diversity[, ses] %>% gather(key = "est.se") -> se.dat
Diversity[, -unique(ses)] %>% gather(key = "est") -> mean.dat
saveRDS(Diversity, file = paste0("./", Proj_name))
write.csv(Diversity, file = paste0("./", Proj_name))# make combined richness diversity
Richness_Diversity <- cbind(Richness, Diversity)
ses <- grep("\\.se", colnames(Richness_Diversity))
Richness_Diversity[, ses] %>%
gather(key = "est.se") ->
se.dat
Richness_Diversity[, -unique(ses)] %>%
gather(key = "Metric",
value = "Estimate") ->
mean.dat
Richness_Diversity_long <-
cbind(
Sample = rep(rownames(Richness_Diversity), times = length(unique(mean.dat$Metric))),
mean.dat,
lerr = mean.dat$Estimate - se.dat$value,
herr = mean.dat$Estimate + se.dat$value
)
Richness_Diversity_long$Metric <-
factor(
Richness_Diversity_long$Metric,
levels = c("S.obs", "S.chao1", "S.ACE", "Shannon", "Inv.simpson", "BP"),
labels = c("S obs.", "Chao1", "ACE", "Shannon", "Inv. Simpson" , "Berger Parker")
)
Richness_Diversity_long %<>%
cbind(.,
sample_data(Ps_obj_filt))
# S Obs
data2test <- Richness_Diversity_long[Richness_Diversity_long$Metric == "S obs.", ]
(mod_obsS <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "S obs.")))## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 65 705 9787 62082
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 54.4
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 594952 1 201.25 3.1e-12 ***
## Location 3860 1 1.31 0.266
## Rock.type 7702 1 2.61 0.121
## Location:Rock.type 9787 1 3.31 0.083 .
## Residuals 62082 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 313
##
## Location
## Slope City
## 314 311
## rep 13 12
##
## Rock.type
## Chalk Limestone
## 307 318
## rep 13 12
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 292 340
## rep 7 6
## City 326 296
## rep 6 6
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 65 705 9787 62082
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 54.4
## Estimated effects may be unbalanced
| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
|---|---|---|---|---|---|---|
| Slope | Chalk | 292 | 20.6 | 21 | 249 | 334 |
| City | Chalk | 326 | 22.2 | 21 | 280 | 372 |
| Slope | Limestone | 340 | 22.2 | 21 | 294 | 387 |
| City | Limestone | 296 | 22.2 | 21 | 249 | 342 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -34.6 30.2 21 -1.143 0.6680
## Slope Chalk - Slope Limestone -48.8 30.2 21 -1.614 0.3930
## Slope Chalk - City Limestone -4.1 30.2 21 -0.135 0.9990
## City Chalk - Slope Limestone -14.3 31.4 21 -0.454 0.9680
## City Chalk - City Limestone 30.5 31.4 21 0.971 0.7670
## Slope Limestone - City Limestone 44.8 31.4 21 1.426 0.4980
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(obsS_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
|---|---|---|---|---|---|---|---|---|
| 1 | Slope | Chalk | 292 | 20.6 | 21 | 236 | 347 | a |
| 4 | City | Limestone | 296 | 22.2 | 21 | 235 | 356 | a |
| 2 | City | Chalk | 326 | 22.2 | 21 | 266 | 387 | a |
| 3 | Slope | Limestone | 340 | 22.2 | 21 | 280 | 401 | a |
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq |
|---|---|---|---|---|---|
| 1 | 64.6 | 64.6 | 0.022 | 0.884 | 0.001 |
| 1 | 704.8 | 704.8 | 0.238 | 0.630 | 0.010 |
| 1 | 9786.6 | 9786.6 | 3.310 | 0.083 | 0.135 |
| 21 | 62081.9 | 2956.3 | NA | NA | 0.855 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_obsS, Location ~ Rock.type)# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
# Shannon
(mod_Shannon <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Shannon")))## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 1.07 0.18 0.16 4.41
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.458
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 313.2 1 1491.16 <2e-16 ***
## Location 1.1 1 5.38 0.03 *
## Rock.type 0.2 1 0.78 0.39
## Location:Rock.type 0.2 1 0.74 0.40
## Residuals 4.4 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 3.55
##
## Location
## Slope City
## 3.75 3.33
## rep 13.00 12.00
##
## Rock.type
## Chalk Limestone
## 3.47 3.64
## rep 13.00 12.00
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 3.60 3.92
## rep 7.00 6.00
## City 3.33 3.34
## rep 6.00 6.00
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 1.07 0.18 0.16 4.41
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.458
## Estimated effects may be unbalanced
| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
|---|---|---|---|---|---|---|
| Slope | Chalk | 3.60 | 0.173 | 21 | 3.24 | 3.96 |
| City | Chalk | 3.33 | 0.187 | 21 | 2.94 | 3.72 |
| Slope | Limestone | 3.92 | 0.187 | 21 | 3.53 | 4.31 |
| City | Limestone | 3.34 | 0.187 | 21 | 2.95 | 3.73 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk 0.268 0.255 21 1.050 0.7220
## Slope Chalk - Slope Limestone -0.321 0.255 21 -1.258 0.5980
## Slope Chalk - City Limestone 0.264 0.255 21 1.035 0.7310
## City Chalk - Slope Limestone -0.589 0.265 21 -2.225 0.1490
## City Chalk - City Limestone -0.004 0.265 21 -0.015 1.0000
## Slope Limestone - City Limestone 0.585 0.265 21 2.210 0.1530
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(Shannon_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
|---|---|---|---|---|---|---|---|---|
| 2 | City | Chalk | 3.33 | 0.187 | 21 | 2.82 | 3.84 | a |
| 4 | City | Limestone | 3.34 | 0.187 | 21 | 2.83 | 3.85 | a |
| 1 | Slope | Chalk | 3.60 | 0.173 | 21 | 3.13 | 4.07 | a |
| 3 | Slope | Limestone | 3.92 | 0.187 | 21 | 3.41 | 4.43 | a |
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq |
|---|---|---|---|---|---|
| 1 | 1.069 | 1.069 | 5.090 | 0.035 | 0.184 |
| 1 | 0.176 | 0.176 | 0.839 | 0.370 | 0.030 |
| 1 | 0.156 | 0.156 | 0.744 | 0.398 | 0.027 |
| 21 | 4.411 | 0.210 | NA | NA | 0.759 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_Shannon, Location ~ Rock.type)## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 3 1924 8494 70754
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 58
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 4356003 1 1292.88 <2e-16 ***
## Location 12 1 0.00 0.95
## Rock.type 1634 1 0.48 0.49
## Location:Rock.type 8494 1 2.52 0.13
## Residuals 70754 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 417
##
## Location
## Slope City
## 417 418
## rep 13 12
##
## Rock.type
## Chalk Limestone
## 409 426
## rep 13 12
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 392 446
## rep 7 6
## City 428 407
## rep 6 6
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 3 1924 8494 70754
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 58
## Estimated effects may be unbalanced
| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
|---|---|---|---|---|---|---|
| Slope | Chalk | 392 | 21.9 | 21 | 347 | 438 |
| City | Chalk | 428 | 23.7 | 21 | 379 | 477 |
| Slope | Limestone | 446 | 23.7 | 21 | 396 | 495 |
| City | Limestone | 407 | 23.7 | 21 | 358 | 457 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -35.5 32.3 21 -1.101 0.6930
## Slope Chalk - Slope Limestone -53.2 32.3 21 -1.646 0.3760
## Slope Chalk - City Limestone -14.8 32.3 21 -0.458 0.9670
## City Chalk - Slope Limestone -17.6 33.5 21 -0.525 0.9520
## City Chalk - City Limestone 20.7 33.5 21 0.619 0.9250
## Slope Limestone - City Limestone 38.4 33.5 21 1.144 0.6670
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(ACE_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
|---|---|---|---|---|---|---|---|---|
| 1 | Slope | Chalk | 392 | 21.9 | 21 | 333 | 452 | a |
| 4 | City | Limestone | 407 | 23.7 | 21 | 343 | 472 | a |
| 2 | City | Chalk | 428 | 23.7 | 21 | 363 | 493 | a |
| 3 | Slope | Limestone | 446 | 23.7 | 21 | 381 | 510 | a |
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq |
|---|---|---|---|---|---|
| 1 | 2.54 | 2.54 | 0.001 | 0.978 | 0.000 |
| 1 | 1923.90 | 1923.90 | 0.571 | 0.458 | 0.024 |
| 1 | 8494.42 | 8494.42 | 2.521 | 0.127 | 0.105 |
| 21 | 70753.94 | 3369.24 | NA | NA | 0.872 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_ACE, Location ~ Rock.type)# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
#Inv. Simpson
(mod_InvSim <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Inv. Simpson")))## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 470 125 99 1025
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 6.99
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 7498 1 153.60 4e-11 ***
## Location 504 1 10.33 0.0042 **
## Rock.type 117 1 2.40 0.1364
## Location:Rock.type 99 1 2.03 0.1686
## Residuals 1025 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 17.4
##
## Location
## Slope City
## 21.5 12.9
## rep 13.0 12.0
##
## Rock.type
## Chalk Limestone
## 15.2 19.7
## rep 13.0 12.0
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 17.7 26.0
## rep 7.0 6.0
## City 12.7 13.0
## rep 6.0 6.0
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 470 125 99 1025
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 6.99
## Estimated effects may be unbalanced
| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
|---|---|---|---|---|---|---|
| Slope | Chalk | 17.7 | 2.64 | 21 | 12.20 | 23.2 |
| City | Chalk | 12.7 | 2.85 | 21 | 6.75 | 18.6 |
| Slope | Limestone | 26.0 | 2.85 | 21 | 20.09 | 32.0 |
| City | Limestone | 13.0 | 2.85 | 21 | 7.09 | 19.0 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk 5.01 3.89 21 1.290 0.5800
## Slope Chalk - Slope Limestone -8.33 3.89 21 -2.140 0.1720
## Slope Chalk - City Limestone 4.67 3.89 21 1.200 0.6330
## City Chalk - Slope Limestone -13.34 4.03 21 -3.310 0.0160
## City Chalk - City Limestone -0.34 4.03 21 -0.090 1.0000
## Slope Limestone - City Limestone 13.00 4.03 21 3.220 0.0200
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(InvSim_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
|---|---|---|---|---|---|---|---|---|
| 2 | City | Chalk | 12.7 | 2.85 | 21 | 4.92 | 20.4 | a |
| 4 | City | Limestone | 13.0 | 2.85 | 21 | 5.26 | 20.8 | a |
| 1 | Slope | Chalk | 17.7 | 2.64 | 21 | 10.50 | 24.9 | ab |
| 3 | Slope | Limestone | 26.0 | 2.85 | 21 | 18.26 | 33.8 | b |
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq |
|---|---|---|---|---|---|
| 1 | 470.4 | 470.4 | 9.64 | 0.005 | 0.273 |
| 1 | 125.3 | 125.3 | 2.57 | 0.124 | 0.073 |
| 1 | 99.3 | 99.3 | 2.03 | 0.169 | 0.058 |
| 21 | 1025.0 | 48.8 | NA | NA | 0.596 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_InvSim, Location ~ Rock.type)# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
#Berger Parker
(mod_BP <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Berger Parker")))## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 0.0643 0.0133 0.0000 0.1080
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.0717
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.924 1 179.69 9.2e-12 ***
## Location 0.066 1 12.92 0.0017 **
## Rock.type 0.013 1 2.59 0.1228
## Location:Rock.type 0.000 1 0.00 0.9939
## Residuals 0.108 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 0.192
##
## Location
## Slope City
## 0.143 0.244
## rep 13.000 12.000
##
## Rock.type
## Chalk Limestone
## 0.214 0.168
## rep 13.000 12.000
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 0.16 0.12
## rep 7.00 6.00
## City 0.27 0.22
## rep 6.00 6.00
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 0.0643 0.0133 0.0000 0.1080
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.0717
## Estimated effects may be unbalanced
| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
|---|---|---|---|---|---|---|
| Slope | Chalk | 0.164 | 0.027 | 21 | 0.108 | 0.220 |
| City | Chalk | 0.268 | 0.029 | 21 | 0.207 | 0.328 |
| Slope | Limestone | 0.118 | 0.029 | 21 | 0.057 | 0.179 |
| City | Limestone | 0.221 | 0.029 | 21 | 0.160 | 0.282 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -0.1035 0.0399 21 -2.600 0.0740
## Slope Chalk - Slope Limestone 0.0460 0.0399 21 1.150 0.6620
## Slope Chalk - City Limestone -0.0571 0.0399 21 -1.430 0.4950
## City Chalk - Slope Limestone 0.1495 0.0414 21 3.610 0.0080
## City Chalk - City Limestone 0.0464 0.0414 21 1.120 0.6800
## Slope Limestone - City Limestone -0.1031 0.0414 21 -2.490 0.0910
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(BP_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART| Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
|---|---|---|---|---|---|---|---|---|
| 3 | Slope | Limestone | 0.118 | 0.029 | 21 | 0.038 | 0.198 | a |
| 1 | Slope | Chalk | 0.164 | 0.027 | 21 | 0.090 | 0.238 | ab |
| 4 | City | Limestone | 0.221 | 0.029 | 21 | 0.141 | 0.301 | ab |
| 2 | City | Chalk | 0.268 | 0.029 | 21 | 0.188 | 0.347 | b |
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq |
|---|---|---|---|---|---|
| 1 | 0.064 | 0.064 | 12.52 | 0.002 | 0.347 |
| 1 | 0.013 | 0.013 | 2.59 | 0.123 | 0.072 |
| 1 | 0.000 | 0.000 | 0.00 | 0.994 | 0.000 |
| 21 | 0.108 | 0.005 | NA | NA | 0.582 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_BP, Location ~ Rock.type)Richness_Diversity_long %>%
dplyr::filter(!Metric %in% c("Chao1", "ACE")) %>%
mutate_at(., "Metric", ~fct_recode(., "Observed S" = "S obs.", "Inv. Simpson" = "Inv. Simpson", "Berger Parker" = "Berger Parker")) %>%
mutate_at(., "Metric", ~fct_relevel(., "Observed S", "Inv. Simpson", "Shannon", "Berger Parker")) %>%
droplevels() ->
Richness_Diversity_long2plot
p_alpha <- ggplot() +
geom_violin(data = Richness_Diversity_long2plot,
aes(
x = Location,
y = Estimate,
ymin = lerr,
ymax = herr
), colour = "grey",
fill = "grey",
alpha = 1 / 3) +
geom_jitter2(data = Richness_Diversity_long2plot,
aes(x = Location,
y = Estimate,
ymin = lerr,
ymax = herr,
colour = Location), size = 3, width = 0.2, alpha = 2/3) +
scale_colour_manual(values = Gradient.colours[c(5, 6, 11)], name = "") +
# geom_errorbar(alpha = 1 / 2, width = 0.3) +
xlab("") +
ylab("") +
theme(axis.text.x = element_text(
angle = 45,
vjust = 0.9,
hjust = 1
),
legend.position="none") +
facet_grid(Metric ~ Rock.type, scale = "free") +
background_grid(major = "y",
minor = "none",
size.major = 0.8)
dat_text <- data.frame(
label = str_remove_all(c(obsS_pairwise$.group[1:4],
Shannon_pairwise$.group[1:4],
InvSim_pairwise$.group[1:4],
BP_pairwise$.group[1:4]),
pattern = " "),
Metric = fct_inorder(rep(levels(Richness_Diversity_long2plot$Metric), each = 4)),
Rock.type = fct_c(obsS_pairwise$Rock.type[1:4],
Shannon_pairwise$Rock.type[1:4],
InvSim_pairwise$Rock.type[1:4],
BP_pairwise$Rock.type[1:4]),
x = fct_c(obsS_pairwise$Location[1:4],
Shannon_pairwise$Location[1:4],
InvSim_pairwise$Location[1:4],
BP_pairwise$Location[1:4]),
# x = as.factor(levels(Richness_Diversity_long2plot$Climate.Source)),
y = rep(c(520, 45, 5.2, 0.52), each = 4)
# y = rep(c(40, 140, 0.5), each = 6)
)
p_alpha <- p_alpha + geom_text(
data = dat_text,
mapping = aes(x = x, y = y, label = label),
nudge_x = 0,
nudge_y = 0
)
print(p_alpha)Calculate and plot beta diversity metrics.
(mod1 <- adonis(
otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.63 0.628 2.11 0.081 0.023 *
## Rock.type 1 0.39 0.386 1.30 0.050 0.245
## Location:Rock.type 1 0.49 0.485 1.63 0.063 0.098 .
## Residuals 21 6.25 0.297 0.806
## Total 24 7.75 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
otu_table(Ps_obj_filt_GMPR) ~ Location,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.63 0.628 2.03 0.081 0.029 *
## Residuals 23 7.12 0.309 0.919
## Total 24 7.75 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Location,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod1_pairwise)## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 City - Slope 24 2.03 0.0811 0.034 0.034 .
## [[1]]
## [1] "City - Slope"
mod2_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Rock.type,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod2_pairwise)## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 Chalk - Limestone 24 1.21 0.0501 0.32 0.32
## [[1]]
## character(0)
mod3_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Location.rock,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod3_pairwise)## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 City:Chalk - City:Limestone 11 1.20 0.1069 0.344 0.413
## 2 City:Chalk - Slope:Limestone 11 1.43 0.1251 0.180 0.270
## 3 City:Chalk - Slope:Chalk 12 2.92 0.2099 0.017 0.102
## 4 City:Limestone - Slope:Limestone 11 1.07 0.0964 0.429 0.429
## 5 City:Limestone - Slope:Chalk 12 1.98 0.1528 0.091 0.182
## 6 Slope:Limestone - Slope:Chalk 12 1.90 0.1471 0.080 0.182
## [[1]]
## character(0)
Unifrac_mat <- UniFrac(Ps_obj_filt,
weighted = TRUE,
normalized = TRUE,
parallel = TRUE,
fast = TRUE)
(mod1 <- adonis(
Unifrac_mat ~ Location * Rock.type,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = Unifrac_mat ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.047 0.0469 1.37 0.053 0.214
## Rock.type 1 0.053 0.0529 1.54 0.060 0.147
## Location:Rock.type 1 0.063 0.0632 1.84 0.072 0.068 .
## Residuals 21 0.719 0.0342 0.815
## Total 24 0.882 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
Unifrac_mat ~ Location,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = Unifrac_mat ~ Location, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.047 0.0469 1.29 0.053 0.26
## Residuals 23 0.835 0.0363 0.947
## Total 24 0.882 1.000
The difference between city and slope is significant based on the Morisita-Horn distances between ASV but
Ps_obj_ord1 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~ Location)
explained <- eigenvals(Ps_obj_ord2)/sum( eigenvals(Ps_obj_ord2)) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))
Ps_obj_filt_GMPR %>%
plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) ->
ord_df
p_ord <- ggplot(ord_df,
aes(
x = CAP1,
y = MDS1,
shape = Rock.type,
color = Location
)) +
stat_ellipse(
aes(x = CAP1,
y = MDS1,
fill = Location
),
geom = "polygon",
alpha = 1/4,
type = "t",
level = 0.9,
# linetype = 2,
inherit.aes = FALSE
) +
geom_point(size = 4, alpha = 2 / 3) +
guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
scale_colour_manual(values = Gradient.colours) +
scale_fill_manual(values = Gradient.colours, guide = "none") +
labs(x = sprintf("CAP1 (%s%%)", explained[1]),
y = sprintf("CAP2 (%s%%)", explained[2])) +
coord_fixed(ratio = sqrt(explained[2] / explained[1])) +
theme(legend.justification = "top")
# facet_wrap(. ~ Rock.type)
print(p_ord)Ps_obj_ord1 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location)
explained <- Ps_obj_ord2$values$Relative_eig/sum(Ps_obj_ord2$values$Relative_eig) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))
Ps_obj_filt %>%
plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) ->
ord_df
p_ord_phylo <- ggplot(ord_df,
aes(
x = Axis.1,
y = Axis.2,
shape = Rock.type,
color = Location
)) +
stat_ellipse(
aes(x = Axis.1,
y = Axis.2,
fill = Location
),
geom = "polygon",
alpha = 1/4,
type = "t",
level = 0.9,
# linetype = 2,
inherit.aes = FALSE
) +
geom_point(size = 4, alpha = 2 / 3) +
theme_bw(base_size = 14) +
guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
scale_colour_manual(values = Gradient.colours) +
scale_fill_manual(values = Gradient.colours, guide = "none") +
labs(x = sprintf("CAP1 (%s%%)", explained[1]),
y = sprintf("CAP2 (%s%%)", explained[2])) +
coord_fixed(ratio = sqrt(explained[2] / explained[1])) #+
# facet_wrap(. ~ Rock.type)
print(p_ord_phylo)STAMPR analysis of the differences of each phylum between locations using Aligned Rank Transformed ANOVA test and a post-hoc estimated marginal means.
Taxa_tests_phylum1 <- STAMPR2(Ps_obj_filt, vars2test = "Location", threshold = 0.001, outputfile = paste0(Proj_name, "_Location"))
pSTAMPR1 <- plotSTAMPR(Taxa_tests_phylum1, pair = "Slope - City")
print(pSTAMPR1)Taxa_tests_phylum2 <- STAMPR2(Ps_obj_filt, vars2test = c("Location", "Rock.type"), threshold = 0.001, outputfile = paste0(Proj_name, "_Location_Rock"))
pSTAMPR2 <- plotSTAMPR(Taxa_tests_phylum2, pair = "Slope:Chalk - City:Chalk")
print(pSTAMPR2)Agglomerate data and tag rare taxa
Ps_obj_filt_GMPR_ra <- transform_sample_counts(Ps_obj_filt_GMPR, function(x){x / sum(x)} * 100)
Ps_obj_filt_GMPR_glom <- tax_glom(Ps_obj_filt_GMPR_ra,
"Phylum",
NArm = TRUE)
Ps_obj_filt_GMPR_glom_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom)
Ps_obj_filt_GMPR_glom_DF$Phylum %<>% as.character()
# Ps_obj_filt3_glom_DF %<>% mutate(Species = fct_relevel(Species, "NA", after = Inf))
# group dataframe by Phylum, calculate median rel. abundance
Ps_obj_filt_GMPR_glom_DF %>%
group_by(Phylum) %>%
summarise(median = median(Abundance)) ->
medians
# find Phyla whose rel. abund. is less than 1%
Rare_phyla0.01 <- medians[medians$median <= 0.01, ]$Phylum
# change their name to "Rare"
Ps_obj_filt_GMPR_glom_DF[Ps_obj_filt_GMPR_glom_DF$Phylum %in% Rare_phyla0.01, ]$Phylum <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_DF %>%
group_by(Sample, Phylum, Location, Rock.type, Location.rock) %>%
summarise(Abundance = sum(Abundance)) ->
Ps_obj_filt_GMPR_glom_DF_2plot
# ab.taxonomy$Freq <- sqrt(ab.taxonomy$Freq)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("unclassified", "Unclassified", .)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("uncultured", "Unclassified", .)
Ps_obj_filt_GMPR_glom_DF_2plot %>%
group_by(Sample) %>%
filter(Phylum == "Rare") %>%
summarise(`Rares (%)` = sum(Abundance)) ->
RaresSummarise taxonomy
# Percentage of reads classified as rare
Rares %>%
kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)| Sample | Rares (%) |
|---|---|
| ShivSClk1S12 | 0.01 |
| ShivSClk2S15 | 0.59 |
| ShivSClk3S16 | 0.02 |
| ShivSClk4S17 | 0.01 |
| ShivSClk5S18 | 0.01 |
| ShivSLimek1S13 | 0.02 |
| ShivSLimek2S14 | 0.04 |
| ShivSLimek3S19 | 14.27 |
| ShivSLimek4S20 | 11.48 |
| ShivSLimek5S21 | 0.01 |
| ShivSLimek6S25 | 0.01 |
| SlpClk1S3 | 0.24 |
| SlpClk2S4 | 0.04 |
| SlpClk3S5 | 0.04 |
| SlpClk4S6 | 0.03 |
| SlpClk5S7 | 0.01 |
| SlpClk6S8 | 0.03 |
| SlpClk7S9 | 0.01 |
| SlpClk8S10 | 0.03 |
| SlpClk9S11 | 0.05 |
| SlpLime1S1 | 0.01 |
| SlpLime2S2 | 0.04 |
| SlpLime3S22 | 0.09 |
| SlpLime4S23 | 0.01 |
| SlpLime5S24 | 0.02 |
sample_order <- match(Rares$Sample, row.names(sample_data(Ps_obj_filt_GMPR_glom)))
Rares %<>% arrange(., sample_order)
Rares %>%
cbind(., sample_data(Ps_obj_filt_GMPR_glom)) %>%
group_by(Location.rock) %>%
setNames(make.names(names(.), unique = TRUE)) %>% # fails for some reason without it
summarise(`Rares (%)` = mean(`Rares....`)) ->
Rares_merged
# Percentage of reads classified as rare
Rares_merged %>%
kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)| Location.rock | Rares (%) |
|---|---|
| City:Chalk | 0.11 |
| City:Limestone | 4.31 |
| Slope:Chalk | 0.03 |
| Slope:Limestone | 0.07 |
Plot taxonomy box-plot
Ps_obj_filt_GMPR_glom_DF_2plot %>%
group_by(Phylum) %>%
summarise(sum.Taxa = sum(Abundance)) %>%
arrange(desc(sum.Taxa)) -> Taxa_rank
Ps_obj_filt_GMPR_glom_DF_2plot$Phylum %<>%
factor(., levels = Taxa_rank$Phylum) %>%
fct_relevel(., "Rare", after = Inf)
p_taxa_box <-
ggplot(Ps_obj_filt_GMPR_glom_DF_2plot, aes(x = Phylum, y = (Abundance))) +
geom_boxplot(aes(group = interaction(Phylum, Location)), position = position_dodge(width = 0.9), fatten = 1) +
geom_point(
aes(colour = Rock.type),
position = position_jitterdodge(dodge.width = 1),
alpha = 1 / 2,
stroke = 0,
size = 2
) +
scale_colour_manual(values = Gradient.colours, name = "") +
theme_bw()+
# theme_cowplot(font_size = 11, font_family = f_name) +
labs(x = NULL, y = "Relative abundance (%)") +
guides(colour = guide_legend(override.aes = list(size = 5))) +
facet_grid(Location ~ .) +
background_grid(major = "xy",
minor = "none") +
theme(axis.text.x = element_text(
angle = 45,
vjust = 0.9,
hjust = 0.9
),
legend.position = c(.99, .99),
legend.justification = c("right", "top"),
legend.box.just = "top",
legend.margin = margin(0, 3, 3, 3))
print(p_taxa_box)Tag rare phyla (for plotting purposes only)
Ps_obj_filt_GMPR_glom <- tax_glom(Ps_obj_filt_GMPR,
"Phylum",
NArm = TRUE) # glomerate to the phylum level
Ps_obj_filt_GMPR_glom_rel <- transform_sample_counts(Ps_obj_filt_GMPR_glom, function(x) x / sum(x)) # transform to rel. ab.
Ps_obj_filt_GMPR_glom_rel_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom_rel) # generate a df
Ps_obj_filt_GMPR_glom_rel_DF$Phylum %<>% as.character() # factor to char
# group dataframe by Phylum, calculate median rel. abundance
Ps_obj_filt_GMPR_glom_rel_DF %>%
group_by(Phylum) %>%
summarise(median = median(Abundance)) ->
medians
# find Phyla whose median rel. abund. is less than 0.5%
Rare_phyla0.005 <- medians[medians$median <= 0.005, ]$Phylum
# change their name to "Rare"
Ps_obj_filt_GMPR_glom_rel_DF[Ps_obj_filt_GMPR_glom_rel_DF$Phylum %in% Rare_phyla0.005, ]$Phylum <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_rel_DF %>%
group_by(Phylum) %>%
summarise(Abundance = sum(Abundance)) %>%
arrange(desc(Abundance)) -> Taxa_rankDetect differentially abundant ASVs using corncob (Martin, Witten and Willis 2020)
comparison_string <- c("City", "Slope")
Ps_obj_filt %>%
subset_samples(Location %in% c(comparison_string[1], comparison_string[2])) %>%
tax_glom("Order") ->
Ps_obj_filt_pairwise_glom
# Test differential abundance for location
da_Loc <- differentialTest(formula = ~ Location,
phi.formula = ~ Location,
formula_null = ~ 1,
phi.formula_null = ~ Location,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Loc_intervals <- plot(da_Loc, level = "Class", data_only = T)
which(is.na(da_Loc$p)) %>% names## [1] "OTU398" "OTU164" "OTU476" "OTU641" "OTU239" "OTU481" "OTU727" "OTU451" "OTU245"
Ps_obj_filt %>%
transform_sample_counts(., function(x) x / sum(x) * 100) %>%
taxa_sums(.) %>%
map_dbl(~(.x / nsamples(Ps_obj_filt))) %>%
enframe(name = "OTU", value = "Mean abundance (%)") ->
baseMean
map(da_Loc$all_models,15) %>%
map(.,2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Loc_df
da_Loc_df %<>% rows_update(., tibble(ymin = da_Loc_intervals$xmin, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df %<>% rows_update(., tibble(ymax = da_Loc_intervals$xmax, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df[da_Loc_df$Phylum %in% Rare_phyla0.005, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_loc <- GGPlotCorncob(da_Loc_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_df$`Differential abundance` > 0 & da_Loc_df$Significance == "Pass"), " ⬇", sum(da_Loc_df$`Differential abundance` < 0 & da_Loc_df$Significance == "Pass"), " (", nrow(da_Loc_df), ")")))
p_corncob_loc <- p_corncob_loc +
geom_text(
data = corncob_summary,
mapping = aes(x = Inf, y = Inf, label = Label),
hjust = 1.1,
vjust = 1.6
) +
scale_size_continuous(name = "Mean abundance (%)",
limits = c(min(da_Loc_df$`Mean abundance (%)`), max(da_Loc_df$`Mean abundance (%)`))
) +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-15, 15))
print(p_corncob_loc)Modelling differential abundance and variance between locations discovered length(da_Loc$significant_taxa)
comparison_string <- c("Limestone", "Chalk")
# Test differential abundance and variance for rock type
da_Rock <- differentialTest(formula = ~ Rock.type,
phi.formula = ~ Rock.type,
formula_null = ~ 1,
phi.formula_null = ~ Rock.type,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Rock_intervals <- plot(da_Rock, level = "Class", data_only = TRUE)
which(is.na(da_Rock$p)) %>% names## [1] "OTU780" "OTU164" "OTU580" "OTU679" "OTU696" "OTU712" "OTU519" "OTU503" "OTU542"
## [10] "OTU451" "OTU114" "OTU309" "OTU502" "OTU245" "OTU229"
map(da_Rock$all_models,15) %>%
map(.,2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Rock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Rock_df
da_Rock_df %<>% rows_update(., tibble(ymin = da_Rock_intervals$xmin, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df %<>% rows_update(., tibble(ymax = da_Rock_intervals$xmax, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df[da_Rock_df$Phylum %in% Rare_phyla0.005, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_rock <- GGPlotCorncob(da_Rock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Rock_df$`Differential abundance` > 0 & da_Rock_df$Significance == "Pass"), " ⬇", sum(da_Rock_df$`Differential abundance` < 0 & da_Rock_df$Significance == "Pass"), " (", nrow(da_Rock_df), ")")))
p_corncob_rock <- p_corncob_rock +
geom_text(
data = corncob_summary,
mapping = aes(x = Inf, y = Inf, label = Label),
hjust = 1.1,
vjust = 1.6
) +
scale_size_continuous(name = "Mean abundance (%)",
limits = c(min(da_Rock_df$`Mean abundance (%)`), max(da_Rock_df$`Mean abundance (%)`))
) +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-15, 15))
print(p_corncob_rock)Modelling differential abundance and variance between rock types discovered length(da_Rock$significant_taxa)
# Test differential abundance for location, control for Rock.type for both cases
comparison_string <- c("City", "Slope")
da_Loc_exRock <- differentialTest(formula = ~ Location + Rock.type,
phi.formula = ~ Location + Rock.type,
formula_null = ~ Rock.type,
phi.formula_null = ~ Location + Rock.type,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Loc_exRock_intervals <- plot(da_Loc_exRock, level = "Class", data_only = TRUE)
which(is.na(da_Loc_exRock$p)) %>% names## [1] "OTU780" "OTU398" "OTU164" "OTU476" "OTU580" "OTU679" "OTU696" "OTU712" "OTU641"
## [10] "OTU519" "OTU239" "OTU481" "OTU503" "OTU542" "OTU727" "OTU451" "OTU114" "OTU309"
## [19] "OTU502" "OTU245" "OTU229"
map(da_Loc_exRock$all_models,15) %>%
map(.,2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc_exRock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Loc_exRock_df
da_Loc_exRock_df %<>% rows_update(., tibble(ymin = da_Loc_exRock_intervals$xmin, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df %<>% rows_update(., tibble(ymax = da_Loc_exRock_intervals$xmax, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df[da_Loc_exRock_df$Phylum %in% Rare_phyla0.005, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_locExroc <- GGPlotCorncob(da_Loc_exRock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_exRock_df$`Differential abundance` > 0 & da_Loc_exRock_df$Significance == "Pass"), " ⬇", sum(da_Loc_exRock_df$`Differential abundance` < 0 & da_Loc_exRock_df$Significance == "Pass"), " (", nrow(da_Loc_exRock_df), ")")))
p_corncob_locExroc <- p_corncob_locExroc +
geom_text(
data = corncob_summary,
mapping = aes(x = Inf, y = Inf, label = Label),
hjust = 1.1,
vjust = 1.6
) +
scale_size_continuous(name = "Mean abundance (%)",
limits = c(min(da_Loc_exRock_df$`Mean abundance (%)`), max(da_Loc_exRock_df$`Mean abundance (%)`))
) +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-15, 15))
print(p_corncob_locExroc)Modelling differential abundance between locations, while controlling for rock type discovered length(da_Loc_exRock$significant_taxa)
mod260 <- bbdml(formula = OTU260 ~ 1,
phi.formula = ~ 1,
data = Ps_obj_filt)
mod260_Loc <- bbdml(formula = OTU260 ~ Location,
phi.formula = ~ Location,
data = Ps_obj_filt)
mod260_Loc_rock <- bbdml(formula = OTU97 ~ Location*Rock.type,
phi.formula = ~ Location*Rock.type,
data = Ps_obj_filt)
lrtest(mod_null = mod260, mod = mod260_Loc)## [1] 0.002903761
##
## Call:
## bbdml(formula = OTU260 ~ Location, phi.formula = ~Location, data = Ps_obj_filt)
##
##
## Coefficients associated with abundance:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.8861 0.4137 -19.060 9.8e-15 ***
## LocationCity -1.3511 0.4786 -2.823 0.0102 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Coefficients associated with dispersion:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1075 0.5436 -13.076 1.47e-11 ***
## LocationCity -2.8542 0.8170 -3.493 0.00217 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Log-likelihood: -80.58
plot(mod260_Loc, color = "Location", shape = "Rock.type") # add total = TRUE for total counts (i.e. not relative abundance)composite_plot <- ((p_alpha + p_taxa_box + plot_layout(widths = c(1, 2))) /(p_ord + pSTAMPR1) + plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size)))
plot_file <- "Microbiome_1"
svglite(paste0(plot_file, ".svg"),
width = 14,
height = 10.5)
print(composite_plot)
dev.off()## pdf
## 2
ggsave(
paste0(plot_file, ".png"),
composite_plot,
device = agg_png,
width = 14,
height = 10.5,
units = "cm",
res = 900,
scaling = 0.38
)
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))
knitr::include_graphics(paste0(plot_file, ".png"))plot_file <- "Microbiome_2"
svglite(paste0(plot_file, ".svg"),
width = 12,
height = 10)
print(p_corncob_locExroc)
dev.off()## pdf
## 2
Chen J, Chen L. GMPR: A novel normalization method for microbiome sequencing data. bioRxiv 2017:112565.
Martin BD, Witten D, Willis AD. Modeling microbial abundances and dysbiosis with beta-binomial regression. Ann Appl Stat 2020;14:94–115.